Clean data

Data for this study come from the subset of responses collected on the SAPA-project.org website between February 17, 2017 and July 22, 2019. The initial date is the day that the semi-random presentation of items to participants was changed to increase presentation of SPI-135 items, which are the basis for personality measurement in this study. This period also represents a new period of data collection on SAPA containing data that are not available in the public domain at the time of analysis. The end date of data collection was the first day following preregistration of analysis that the authors were able to analyze data.

set.seed(052319)

# load packages
packages = c("tidyverse", "janitor", "psych", "devtools", 
             "PAutilities", "measurements", "here", "caret")
lapply(packages, library, character.only = TRUE)
rm(packages)

#read in data
load(here("../../data/SAPA/collaboration/SAPAdata07feb2017thru22jul2019forSara2.rdata"))
sapa = SAPAdata07feb2017thru22jul2019x

source(here("scripts/personality_scales.R"))
keys = read.csv("data/superKey.csv", header = TRUE, row.names = 1)

# super key -- this contains the master key list for all of SAPA. every item ever administered and every scale you can score
# each row is a single item
# each column is a scale
# the value of a cell is 0 if that item is not part of that scale, 1 if that item positively loads on the scale, and -1 if the item negatively loads on the scale

Participants were included in the analysis if they were under the age of 18, from the United States, and had reported their biological sex at birth, height, and weight.

# remove participants who are 18 years or older and from the US
sapa = sapa %>%
  filter(age < 18) %>%
  filter(country == "USA") %>%
  filter(!is.na(sex)) %>%
  filter(!is.na(height)) %>%
  filter(!is.na(weight))

Education was rescored to represent years of education. All parental SES variables – education, estimated income and estimated prestige, were standardized to the sample and averaged to create a single index of parental SES.

# make sure occupational variables are numeric
sapa = sapa %>%
  mutate_at(vars(matches("^(p)\\d(occ)")), as.numeric)

#or years
sapa = sapa %>%
  mutate(p1edu = case_when(
  p1edu == "less12yrs" ~ "6", 
  p1edu == "HSgrad" ~ "12", 
  p1edu == "SomeCollege" ~ "14", 
  p1edu == "CurrentInUniv" ~ "14", 
  p1edu == "AssociateDegree" ~ "14", 
  p1edu == "CollegeDegree" ~ "16", 
  p1edu == "InGradOrProSchool" ~ "18", 
  p1edu == "GradOrProDegree" ~ "20")) 

sapa = sapa %>%
  mutate(p2edu = case_when(
    p2edu == "less12yrs" ~ "6", 
    p2edu == "HSgrad" ~ "12", 
    p2edu == "SomeCollege" ~ "14", 
    p2edu == "CurrentInUniv" ~ "14",   
    p2edu == "AssociateDegree" ~ "14", 
    p2edu == "CollegeDegree" ~ "16", 
    p2edu == "InGradOrProSchool" ~ "18", 
    p2edu == "GradOrProDegree" ~ "20")) 

sapa$p1edu = as.numeric(sapa$p1edu)
sapa$p2edu = as.numeric(sapa$p2edu)

sapa = sapa %>%
  filter(!is.na(p1edu) | !is.na(p2edu) |
           !is.na(p1occIncomeEst) | !is.na(p2occIncomeEst) |
           !is.na(p1occPrestige) | !is.na(p2occPrestige))

#estimate SES composite

sapa = sapa %>%
  mutate(z.p1edu = scale(p1edu),
         z.p2edu = scale(p2edu),
         z.p1occIncomeEst = scale(p1occIncomeEst),
         z.p2occIncomeEst = scale(p2occIncomeEst),
         z.p2occPrestige = scale(p1occPrestige),
         z.p2occPrestige = scale(p2occPrestige)) 

sapa$ses = rowMeans(sapa[,grepl("^z\\.", names(sapa))], na.rm=T)

sapa = sapa %>%
  dplyr::select(-starts_with("z"))

Big Five traits were scored using a sum-score method, averaged across non-missing responses.

# select just the rows that correspond to variables in the current SAPA dataset
vars = names(sapa)
keys = keys[rownames(keys) %in% vars, ]

# select just the Big 5 scales that are scored using the SPI_135 form 
bfkeys = keys %>%
  select(contains("SPI_135")) %>%
  select(1:5) 

bfkeys = keys2list(as.matrix(bfkeys), sign = T)


# score the items (this contains item and scale statistics too!)
b5scored = scoreItems(keys = bfkeys, items = sapa)

# add scores to SAPA
b5scores = as.data.frame(b5scored$scores[,1:5])
names(b5scores) = gsub("135_27_5_", "", names(b5scores))
sapa = cbind(sapa, b5scores)

The narrower traits, the SPI-27, were scored using IRT scoring. Calibration parameters were taken from a different dataset and are available on request.

load(here("../../data/SAPA/IRTinfoICAR.rdata"))

# IRT score
dataSet <- subset(sapa, select = c(orderForItems))
ICARirtScores <- matrix(nrow=dim(dataSet)[1], ncol=5)
ICARirtSEs <- matrix(nrow=dim(dataSet)[1], ncol=5)

for (i in 1:length(IRToutputICAR)) {
  data <- subset(dataSet, select = c(names(IRToutputICAR[[i]]$irt$difficulty[[1]])))
  calibrations <- IRToutputICAR[[i]]
  scored <- scoreIrt(calibrations, data, keys = NULL, cut = 0)
  irt.data <- irt.se(calibrations, score = as.matrix(scored[,1]))
  TScoring <- (irt.data[,"scores"]-thetaNormsMeans[i])/thetaNormsSDs[i]
  TScores <- TScoring*10+50
  ICARirtScores[,i] <- TScores
  TScoreSEs <- irt.data[,"se"]*10
  ICARirtSEs[,i] <- TScoreSEs
  rm(TScores, TScoring, TScoreSEs, scored, calibrations, data)
}
ICARirtScores <- as.data.frame(ICARirtScores)
colnames(ICARirtScores) <- scaleNames
ICARirtSEs <- as.data.frame(ICARirtSEs)
colnames(ICARirtSEs) <- scaleNames
rm(IRToutputICAR, thetaNormsMeans, thetaNormsSDs, scaleNames)

#add to sapa dataset
sapa = cbind(sapa, ICARirtScores)
# remove individual items
sapa = sapa %>%
  select(-contains("q_"))

Cognition was also scored using IRT scoring, with calibrations from a separate dataset.

load(here("../../data/SAPA/IRTinfoICAR.rdata"))

# IRT score
dataSet <- subset(sapa, select = c(orderForItems))
ICARirtScores <- matrix(nrow=dim(dataSet)[1], ncol=5)
ICARirtSEs <- matrix(nrow=dim(dataSet)[1], ncol=5)

for (i in 1:length(IRToutputICAR)) {
  data <- subset(dataSet, select = c(names(IRToutputICAR[[i]]$irt$difficulty[[1]])))
  calibrations <- IRToutputICAR[[i]]
  scored <- scoreIrt(calibrations, data, keys = NULL, cut = 0)
  irt.data <- irt.se(calibrations, score = as.matrix(scored[,1]))
  TScoring <- (irt.data[,"scores"]-thetaNormsMeans[i])/thetaNormsSDs[i]
  TScores <- TScoring*10+50
  ICARirtScores[,i] <- TScores
  TScoreSEs <- irt.data[,"se"]*10
  ICARirtSEs[,i] <- TScoreSEs
  rm(TScores, TScoring, TScoreSEs, scored, calibrations, data)
}
ICARirtScores <- as.data.frame(ICARirtScores)
colnames(ICARirtScores) <- scaleNames
ICARirtSEs <- as.data.frame(ICARirtSEs)
colnames(ICARirtSEs) <- scaleNames
rm(IRToutputICAR, thetaNormsMeans, thetaNormsSDs, scaleNames)

#add to sapa dataset
sapa = cbind(sapa, ICARirtScores)
# remove individual items
sapa = sapa %>%
  select(-contains("q_"))

BMI percentile represents a participant’s percentile score on BMI relative to others of their assigned sex at birth. These were estimated from the PAutilities package, developed by WHO Multicentre Growth Reference Study (MGRS) information about the development of these reference standards can be found at https://www.cdc.gov/obesity/childhood/defining.html. These standards in turn were develoed using the 2000 CDC growth charts, based on data from 5 national health examination surveys that occurred from 1963 to 1994 and supplemental data from surveys that occurred from 1960 to 1995.

Kuczmarski RJ, Ogden CL, Guo SS, et al. 2000 CDC growth charts for the United States: methods and development. National Center for Health Statistics. Vital Health Stat 11. 2002;(246):1-190

BMI category is assigned based on BMI percentile: participants in the bottom 10% are labeled Underweight, between the top 10% and 5% are Overweight, and top 5% are Obese. All others are labelled Normal.

All analyses were perfomed separately by gender.

sapa = sapa %>%
  mutate(cog = ICAR60) %>%
  select(sex, BMI, BMI_p, BMI_c, p1edu, 
         p1occPrestige, p1occIncomeEst, p2edu, 
         p2occPrestige, p2occIncomeEst, ses, cog, contains("SPI"))

sapa_male = sapa %>%
  filter(sex == "male") %>%
  dplyr::select(-sex) 

sapa_female = sapa %>%
  filter(sex == "female") %>%
  dplyr::select(-sex)

save(b5scored, file = here("data/alpha.Rdata"))

The datasets were split into training (75%) and test (25%) sets; all regression models are estimated using the training sets. The test set was reserved to estimate model accuarcy, comparing models with different sets of individual differences.

# set seed
set.seed(090919)

# parition into training and test sets. objects identify just training rows
train_male = createDataPartition(sapa_male$BMI_c, p = .75, list = FALSE)
train_female = createDataPartition(sapa_female$BMI_c, p = .75, list = FALSE)

Descriptive Statistics

Univariate descriptives

Descriptive statistics are estimated using the psych package.

descriptives = describeBy(sapa, group = "sex")
library(kableExtra)
#pull descriptives into a list
descriptives.df = data.frame(gender = names(descriptives))
descriptives.df$data = descriptives

#add variable names and unnest
descriptives.df = descriptives.df %>% 
  mutate(data = map(data, function(x) mutate(x, vars = rownames(x)))) %>%
  unnest(cols = c(data))

Descriptives Table by Gender

Female

descriptives.df %>%
  filter(gender == "female") %>%
  select(-gender) %>%
  kable(., digits = 2) %>%
  kable_styling()
vars n mean sd median trimmed mad min max range skew kurtosis se
sex* 6530 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 0.00
BMI 6502 23.07 5.00 21.85 22.38 3.65 15.01 52.78 37.77 1.65 3.85 0.06
BMI_p 6530 62.70 27.61 67.80 64.88 30.99 0.00 99.80 99.80 -0.53 -0.80 0.34
BMI_c* 6530 Inf -Inf -Inf
p1edu 6249 15.25 3.58 16.00 15.52 2.97 6.00 20.00 14.00 -0.64 0.56 0.05
p1occPrestige 5723 60.76 14.64 67.12 62.14 13.55 24.22 79.09 54.87 -0.82 -0.42 0.19
p1occIncomeEst 5594 61625.23 21784.89 60244.00 60512.80 20228.59 21980.00 112490.00 90510.00 0.39 -0.24 291.27
p2edu 5876 14.62 3.66 14.00 14.87 2.97 6.00 20.00 14.00 -0.53 0.36 0.05
p2occPrestige 4818 57.87 15.76 63.85 58.75 16.23 24.22 79.09 54.87 -0.47 -1.04 0.23
p2occIncomeEst 4729 59058.07 22926.91 55790.00 57519.07 24640.81 21980.00 112490.00 90510.00 0.53 -0.34 333.40
ses 6528 -0.04 0.76 0.05 0.00 0.76 -2.56 2.32 4.89 -0.62 0.43 0.01
cog 6507 48.06 9.19 48.02 48.00 9.73 18.46 76.53 58.07 0.03 -0.35 0.11
SPI_Agree 6530 4.26 0.67 4.36 4.31 0.53 1.00 6.00 5.00 -0.81 1.35 0.01
SPI_Consc 6530 3.87 0.67 3.86 3.87 0.64 1.00 6.00 5.00 -0.05 0.36 0.01
SPI_Extra 6530 3.61 0.83 3.64 3.62 0.85 1.00 6.00 5.00 -0.09 -0.02 0.01
SPI_Neuro 6530 4.31 0.77 4.36 4.33 0.74 1.29 6.00 4.71 -0.42 0.28 0.01
SPI_Open 6530 4.48 0.55 4.50 4.49 0.53 2.07 6.00 3.93 -0.24 0.31 0.01
SPI_Compassion 6530 51.31 9.51 53.78 52.56 8.67 15.93 63.13 47.20 -1.15 1.12 0.12
SPI_Irritability 6530 50.97 9.87 51.27 51.15 11.81 27.87 72.50 44.63 -0.13 -0.96 0.12
SPI_Sociability 6530 49.82 10.02 52.22 51.19 8.27 15.89 66.44 50.54 -1.36 1.94 0.12
SPI_WellBeing 6488 49.03 9.80 48.95 49.04 11.22 26.47 74.40 47.94 -0.01 -0.84 0.12
SPI_SensationSeeking 6488 49.55 10.03 49.00 49.26 11.45 29.19 71.62 42.43 0.21 -0.88 0.12
SPI_Anxiety 6530 51.88 8.90 54.46 53.24 7.80 19.27 62.10 42.82 -1.26 1.19 0.11
SPI_Honesty 6530 50.65 9.56 52.54 51.80 7.38 3.87 77.00 73.13 -1.70 4.87 0.12
SPI_Industry 6530 50.29 10.01 49.64 50.18 11.27 27.91 73.23 45.31 0.10 -0.80 0.12
SPI_Intellect 6530 49.36 10.20 51.63 50.56 9.26 14.10 65.73 51.63 -1.04 0.75 0.13
SPI_Creativity 6530 49.72 10.03 51.03 50.57 10.36 18.38 64.67 46.29 -0.70 0.03 0.12
SPI_Impulsivity 6488 49.85 10.09 49.16 49.47 11.50 31.84 72.72 40.88 0.27 -0.82 0.13
SPI_AttentionSeeking 6530 49.68 10.08 52.47 50.98 8.25 26.43 66.95 40.51 -1.00 0.12 0.12
SPI_Order 6529 50.18 10.06 49.75 49.98 11.63 25.20 74.44 49.24 0.15 -0.87 0.12
SPI_Authoritarianism 6489 50.55 9.69 52.46 51.61 8.97 15.22 67.09 51.87 -0.95 0.52 0.12
SPI_Charisma 6487 49.74 10.00 50.32 50.11 10.90 22.58 71.58 49.00 -0.30 -0.57 0.12
SPI_Trust 6530 49.83 10.00 50.04 49.87 10.68 27.80 73.20 45.40 -0.03 -0.71 0.12
SPI_Humor 6489 50.65 9.62 52.72 51.87 8.77 7.52 64.74 57.21 -1.25 1.81 0.12
SPI_EmotionalExpressiveness 6489 50.36 10.04 50.04 50.12 12.09 32.70 71.81 39.11 0.16 -1.01 0.12
SPI_ArtAppreciation 6488 51.35 8.94 53.85 52.92 5.09 15.08 72.72 57.65 -2.11 5.33 0.11
SPI_Introspection 6529 50.04 9.89 51.98 51.19 9.80 15.16 62.58 47.43 -0.95 0.48 0.12
SPI_Perfectionism 6530 50.73 9.83 51.36 51.18 10.47 19.29 69.55 50.25 -0.38 -0.44 0.12
SPI_SelfControl 6485 49.31 9.86 49.50 49.30 10.53 25.93 74.34 48.40 0.01 -0.56 0.12
SPI_Conformity 6489 50.59 9.93 51.34 50.83 10.79 28.22 71.14 42.92 -0.20 -0.77 0.12
SPI_Adaptability 6488 49.44 10.12 48.75 49.39 11.32 28.77 70.29 41.52 0.06 -0.88 0.13
SPI_EasyGoingness 6485 49.56 9.99 50.80 50.22 9.63 13.49 69.53 56.03 -0.61 0.09 0.12
SPI_EmotionalStability 6529 48.20 10.02 50.26 48.89 9.90 28.98 68.44 39.46 -0.56 -0.68 0.12
SPI_Conservatism 6480 49.54 10.13 51.27 50.42 10.20 28.73 70.97 42.24 -0.63 -0.46 0.13

Male

descriptives.df %>%
  filter(gender == "male") %>%
  select(-gender) %>%
  kable(., digits = 2) %>%
  kable_styling()
vars n mean sd median trimmed mad min max range skew kurtosis se
sex* 2952 2.00 0.00 2.00 2.00 0.00 2.00 2.00 0.00 0.00
BMI 2934 22.84 4.90 21.62 22.21 3.69 15.01 53.76 38.75 1.48 3.05 0.09
BMI_p 2952 60.00 30.53 64.60 62.02 37.95 0.00 99.90 99.90 -0.40 -1.07 0.56
BMI_c* 2952 Inf -Inf -Inf
p1edu 2821 15.19 3.64 16.00 15.49 2.97 6.00 20.00 14.00 -0.67 0.53 0.07
p1occPrestige 2620 60.20 15.22 67.12 61.55 13.55 24.22 79.09 54.87 -0.75 -0.62 0.30
p1occIncomeEst 2570 61491.45 22195.84 60244.00 60663.71 22141.15 21980.00 112490.00 90510.00 0.29 -0.44 437.83
p2edu 2660 14.71 3.58 16.00 14.95 2.97 6.00 20.00 14.00 -0.60 0.55 0.07
p2occPrestige 2147 57.07 15.59 57.98 58.02 18.55 24.22 79.09 54.87 -0.50 -0.98 0.34
p2occIncomeEst 2111 57247.11 22364.35 52210.00 55755.02 24789.07 21980.00 112490.00 90510.00 0.60 -0.04 486.76
ses 2952 -0.06 0.78 0.05 -0.01 0.75 -2.56 2.32 4.89 -0.62 0.48 0.01
cog 2948 49.65 9.60 49.71 49.68 10.12 19.86 76.53 56.68 -0.06 -0.37 0.18
SPI_Agree 2952 4.11 0.70 4.21 4.17 0.53 1.00 6.00 5.00 -1.00 1.88 0.01
SPI_Consc 2952 3.77 0.63 3.79 3.77 0.53 1.43 6.00 4.57 -0.01 0.64 0.01
SPI_Extra 2952 3.62 0.81 3.64 3.63 0.74 1.00 6.00 5.00 -0.20 0.24 0.01
SPI_Neuro 2952 3.79 0.81 3.86 3.81 0.74 1.00 6.00 5.00 -0.27 0.29 0.01
SPI_Open 2952 4.55 0.53 4.57 4.56 0.53 2.21 6.00 3.79 -0.27 0.50 0.01
SPI_Compassion 2952 47.10 10.40 48.58 48.07 9.76 15.93 63.13 47.20 -0.83 0.40 0.19
SPI_Irritability 2952 47.83 9.94 47.40 47.58 11.58 27.87 72.50 44.63 0.18 -0.91 0.18
SPI_Sociability 2952 50.46 9.87 53.03 51.88 7.51 15.89 66.44 50.54 -1.51 2.53 0.18
SPI_WellBeing 2925 52.24 10.04 53.13 52.67 11.35 26.47 74.40 47.94 -0.30 -0.77 0.19
SPI_SensationSeeking 2921 50.98 9.86 50.67 50.85 10.93 30.74 71.42 40.68 0.11 -0.88 0.18
SPI_Anxiety 2952 45.79 10.97 47.72 46.64 11.56 17.73 62.10 44.37 -0.58 -0.59 0.20
SPI_Honesty 2952 48.62 10.74 50.53 49.85 8.06 3.87 77.00 73.13 -1.49 3.79 0.20
SPI_Industry 2952 49.39 9.93 48.86 49.13 10.98 28.29 73.92 45.64 0.20 -0.73 0.18
SPI_Intellect 2952 51.43 9.39 53.71 52.62 8.79 14.10 65.73 51.63 -1.15 1.25 0.17
SPI_Creativity 2952 50.56 9.92 52.20 51.47 10.46 18.38 64.67 46.29 -0.77 0.17 0.18
SPI_Impulsivity 2921 50.31 9.78 49.82 49.99 10.76 31.84 72.72 40.88 0.24 -0.76 0.18
SPI_AttentionSeeking 2952 50.72 9.77 53.52 52.13 7.58 26.43 65.70 39.27 -1.14 0.54 0.18
SPI_Order 2951 49.64 9.87 49.08 49.39 11.12 25.20 74.44 49.24 0.18 -0.77 0.18
SPI_Authoritarianism 2925 48.87 10.55 51.21 49.93 9.93 13.46 67.09 53.63 -0.86 0.23 0.20
SPI_Charisma 2926 50.59 9.97 51.49 51.01 10.96 21.90 71.58 49.68 -0.34 -0.54 0.18
SPI_Trust 2952 50.44 9.94 50.96 50.57 10.52 27.80 73.20 45.40 -0.10 -0.61 0.18
SPI_Humor 2923 48.57 10.62 50.33 49.73 10.16 7.52 64.74 57.21 -1.02 1.04 0.20
SPI_EmotionalExpressiveness 2923 49.18 9.84 48.23 48.76 11.20 32.70 71.81 39.11 0.31 -0.88 0.18
SPI_ArtAppreciation 2921 46.96 11.47 50.20 48.80 7.78 15.08 72.72 57.65 -1.35 1.37 0.21
SPI_Introspection 2951 49.92 10.21 52.19 51.08 10.56 15.16 62.58 47.43 -0.88 0.19 0.19
SPI_Perfectionism 2952 48.37 10.18 48.80 48.70 10.96 18.72 69.55 50.82 -0.27 -0.47 0.19
SPI_SelfControl 2919 51.54 10.12 51.77 51.70 10.72 25.93 74.34 48.40 -0.13 -0.51 0.19
SPI_Conformity 2926 48.76 10.03 49.07 48.80 11.21 28.22 70.64 42.42 -0.03 -0.82 0.19
SPI_Adaptability 2921 51.24 9.60 51.22 51.44 10.68 28.77 70.29 41.52 -0.13 -0.75 0.18
SPI_EasyGoingness 2920 50.97 9.98 52.52 51.78 9.37 13.49 69.53 56.03 -0.75 0.33 0.18
SPI_EmotionalStability 2952 54.01 8.69 56.24 55.20 6.98 28.98 67.27 38.29 -1.22 1.17 0.16
SPI_Conservatism 2919 51.07 9.62 52.89 52.08 9.05 28.73 70.97 42.24 -0.79 -0.02 0.18

Bivariate

Figure

Female

library(corrplot)
corrplot(R_female, method = "square", 
         title = "\nZero-order correlations among study variables\nFemale Participants", 
         tl.col = "black", 
         mar=c(0,0,1,0))

Male

corrplot(R_male, method = "square", 
         title = "\nZero-order correlations among study variables\nMale Participants", 
         tl.col = "black", 
         mar=c(0,0,1,0))

Logistic Regression (SES)

Multinomial logistic regression models were built that regressed BMI category onto parental socio-economic status and adolescent individual differences. Two basic models were constructed: one that hypothesized parental SES:

\[BMI_i = b_0 + b_1(SES_i) + b_2(ID_i) + e_i\]

and an individual difference were two independent predictors of BMI, and a second that hypothesized these variables interacted with each other:

\[BMI_i = b_0 + b_1(SES_i) + b_2(ID_i) + b_3(SES_i\times ID_i) + e_i\] We iterated through all individual differences – the broad Big Five personality traits, the narrow SPI-27 traits, and cognitive ability – and tested each one independently in the model as an individual difference.

Models were estimated separately for men and women.

# end goal of wrangling is a data frame of data frames
# nested dataframes correspond to a single personality trait
# score refers to a participant's score on that trait
# we also standardize each of our variables within gender


sapa_male_trait = sapa_male %>%
  dplyr::select(-starts_with("p1"), -starts_with("p2"), -starts_with("edu")) %>%
  mutate(BMI_c = factor(BMI_c, levels = c("Normal Weight", "Underweight", "Overweight", "Obese"))) %>%
  mutate(set = ifelse(row_number() %in% train_male[,1], "train", "test")) %>%
  gather("trait_name", "trait_score", -ses, -BMI_c, -BMI, -BMI_p, -set) %>%
  group_by(trait_name, set) %>%
  mutate(trait_score = scale(trait_score)) %>%
  ungroup() %>%
  group_by(trait_name) %>%
  nest()

sapa_female_trait = sapa_female %>%
  dplyr::select(-starts_with("p1"), -starts_with("p2"), -starts_with("edu")) %>%
  mutate(BMI_c = factor(BMI_c, levels = c("Normal Weight", "Underweight", "Overweight", "Obese"))) %>%
  mutate(set = ifelse(row_number() %in% train_male[,1], "train", "test")) %>%
  gather("trait_name", "trait_score", -ses, -BMI_c, -BMI, -BMI_p, -set) %>%
  group_by(trait_name, set) %>%
  mutate(trait_score = scale(trait_score)) %>%
  ungroup() %>%
  group_by(trait_name) %>%
  nest()

Controlling for personality

To estimate the effect of socioeconomic status on BMI category, we graph the estimates of the SES slope coefficient across all logistic regression models controlling for individual differences. This presents not only the average estimate across all models (solid line), but the range of estimates – a wide range suggests that the effect of SES on BMI is sensitive to the inclusion of different individual difference measures, while a narrow range suggests that the effect of SES on BMI is persistent through personality and cognition.

Female

Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among girls. Adolescent girls living in higher SES households were, on average, 13% less likely to be Underweight, 31% less likely, and 45% less likely to be Obese compared to low SES counterparts.

Male

Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among boys. Adolescent boys living in higher SES households were, on average, 15% less likely to be Underweight, 21% less likely, and 35% less likely to be Obese compared to low SES counterparts.

Interaction with personality

To estimate the joint effect of socioeconomic status and individual differences on BMI category, we graph the estimates of the interaction terms of SES by individual differences by BMI category. Like before, we present the average effect (solid black line) and the 95% confidence intervals for each model.

Female

Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among girls. Adolescent girls living in higher SES households were, on average, -2% less likely to be Underweight, -5% less likely, and -3% less likely to be Obese compared to low SES counterparts.

Male

Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among boys. Adolescent boys living in higher SES households were, on average, 3% more likely to be Underweight, -3% more likely, and 17% more likely to be Obese compared to low SES counterparts.

Code (Female)

female_ses_only = train(BMI_c ~ ses, data = sapa_female, 
                      subset = train_female, 
                      method = "multinom",
                      na.action = "na.exclude", 
                      trControl = ctrl)


female_log = sapa_female_trait %>%
  # train models on training subset; use mulintomial logistic regression; use specific formula
  mutate(
    cov = map(data, ~train(BMI_c ~ trait_score + ses, data = ., 
                           subset = train_female, 
                           method = "multinom",
                           na.action = "na.exclude", 
                           trControl = ctrl)),
    int = map(data, ~train(BMI_c ~ trait_score*ses, data = ., 
                           subset = train_female, 
                           method = "multinom",
                           na.action = "na.exclude", 
                           trControl = ctrl))) %>%
  gather("model", "output", cov, int) %>%
  # create test data from all rows not used in training
  mutate(test_data = map(data, .f = function(x) x[-train_female, ]),
         #extract reference (true) BMI categories from test data
         test_reference = map(test_data, "BMI_c"),
         # predict categories from model output; na.pass puts NAs in any row with missing data
         predicted = map2(output, test_data, predict, na.action = "na.pass"),
         # calculate accuracy, sensitivity, specificity, etc
         confusion = map2(predicted, test_reference, confusionMatrix),
         # extract final model coefficients
         final_mod = map(output, "finalModel"),
         # tidy output for printing
         coef = map(final_mod, broom::tidy, conf.int = TRUE))

Code (Male)

male_ses_only = train(BMI_c ~ ses, data = sapa_male, 
                 subset = train_male, 
                 method = "multinom",
                 maxit= 1000,
                 na.action = "na.exclude", 
                 trControl = ctrl)

accuracy = predict(male_ses_only, type="raw", newdata=sapa_male[-train_male, ])
postResample(sapa_male[-train_male, "BMI_c"], accuracy)

male_log = sapa_male_trait %>%
  # train models on training subset; use mulintomial logistic regression; use specific formula
  mutate(
    cov = map(data, ~train(BMI_c ~ trait_score + ses, data = ., 
                               subset = train_male, 
                               method = "multinom",
                               na.action = "na.exclude", 
                               trControl = ctrl)),
    int = map(data, ~train(BMI_c ~ trait_score*ses, data = ., 
                               subset = train_male, 
                               method = "multinom",
                               na.action = "na.exclude", 
                               trControl = ctrl))) %>%
  gather("model", "output", cov, int) %>%
  # create test data from all rows not used in training
  mutate(test_data = map(data, .f = function(x) x[-train_male, ]),
         #extract reference (true) BMI categories from test data
         test_reference = map(test_data, "BMI_c"),
         # predict categories from model output; na.pass puts NAs in any row with missing data
         predicted = map2(output, test_data, predict, na.action = "na.pass"),
         # calculate accuracy, sensitivity, specificity, etc
         confusion = map2(predicted, test_reference, confusionMatrix),
         # extract final model coefficients
         final_mod = map(output, "finalModel"),
         # tidy output for printing
         coef = map(final_mod, broom::tidy, conf.int = TRUE))

Logistic Regression (Individual differences)

Female

Trait Obese Overweight Underweight
Cognitive Ability 0.79 0.97 0.97
[0.60, 1.04] [0.76, 1.22] [0.84, 1.12]
SPI: 27 Factors
Compassion 0.99 1.02 1.12
[0.76, 1.29] [0.81, 1.29] [0.96, 1.31]
Irritability 1.18 1.16 1.08
[0.90, 1.55] [0.89, 1.51] [0.93, 1.25]
Sociability 0.75* 0.78* 0.66*
[0.58, 0.98] [0.62, 0.99] [0.57, 0.76]
Well Being 0.52* 0.53* 0.62*
[0.38, 0.70] [0.40, 0.71] [0.53, 0.72]
Sensation Seeking 1.03 1.13 1.11
[0.79, 1.35] [0.89, 1.44] [0.97, 1.28]
Anxiety 1.11 1.24 1.19*
[0.86, 1.44] [0.96, 1.59] [1.03, 1.37]
Honesty 0.72* 0.79* 0.94
[0.56, 0.94] [0.62, 0.99] [0.80, 1.11]
Industry 0.71* 0.88 0.76*
[0.54, 0.95] [0.69, 1.13] [0.65, 0.88]
Intellect 1.26 1.07 0.98
[0.96, 1.67] [0.84, 1.36] [0.85, 1.13]
Creativity 0.94 0.99 1.07
[0.72, 1.23] [0.79, 1.24] [0.92, 1.24]
Impulsivity 1.20 0.97 1.13
[0.93, 1.55] [0.76, 1.25] [0.98, 1.31]
Attention Seeking 0.83 1.16 0.77*
[0.63, 1.08] [0.89, 1.51] [0.67, 0.89]
Order 0.84 0.96 0.89
[0.62, 1.14] [0.75, 1.23] [0.76, 1.04]
Authoritarianism 1.06 1.02 0.87
[0.82, 1.37] [0.80, 1.29] [0.75, 1.01]
Charisma 1.02 0.93 0.84*
[0.79, 1.31] [0.73, 1.18] [0.73, 0.97]
Trust 0.78 0.90 0.79*
[0.58, 1.05] [0.72, 1.14] [0.68, 0.92]
Humor 1.65* 1.01 0.98
[1.14, 2.38] [0.81, 1.27] [0.85, 1.13]
Emotional Expressiveness 0.70* 0.73* 0.76*
[0.53, 0.92] [0.57, 0.92] [0.66, 0.87]
Art Appreciation 1.31 1.02 1.72*
[0.91, 1.89] [0.81, 1.29] [1.41, 2.09]
Introspection 1.06 0.85 1.25*
[0.81, 1.38] [0.67, 1.07] [1.07, 1.46]
Perfectionism 0.88 0.88 0.80*
[0.66, 1.18] [0.69, 1.11] [0.69, 0.92]
Self Control 0.49* 1.14 1.07
[0.35, 0.69] [0.89, 1.46] [0.92, 1.24]
Conformity 1.00 1.15 0.84*
[0.77, 1.29] [0.91, 1.45] [0.73, 0.97]
Adaptability 0.83 1.10 0.90
[0.63, 1.09] [0.87, 1.40] [0.77, 1.04]
Easy Goingness 1.60* 1.69* 1.26*
[1.17, 2.20] [1.29, 2.21] [1.08, 1.47]
Emotional Stability 0.80 0.89 0.75*
[0.63, 1.02] [0.70, 1.12] [0.65, 0.86]
Conservatism 0.81 1.00 0.86*
[0.61, 1.06] [0.78, 1.28] [0.74, 0.99]
SPI: 5 Factors
Agreeableness 0.81 1.03 0.91
[0.64, 1.04] [0.81, 1.32] [0.79, 1.06]
Conscientiousness 0.83 0.98 0.75*
[0.64, 1.09] [0.76, 1.26] [0.64, 0.88]
Extraversion 0.70* 0.89 0.75*
[0.54, 0.90] [0.71, 1.13] [0.65, 0.86]
Neuroticism 1.61* 1.27 1.39*
[1.20, 2.17] [0.96, 1.68] [1.20, 1.62]
Openness 1.06 1.04 1.24*
[0.81, 1.40] [0.82, 1.33] [1.07, 1.44]

Male

Trait Obese Overweight Underweight
Cognitive Ability 0.87 0.74* 0.90
[0.68, 1.12] [0.56, 0.98] [0.78, 1.04]
SPI: 27 Factors
Compassion 0.96 0.95 1.04
[0.77, 1.20] [0.73, 1.24] [0.90, 1.21]
Irritability 1.18 1.25 0.95
[0.92, 1.53] [0.95, 1.65] [0.82, 1.11]
Sociability 0.87 0.83 0.78*
[0.68, 1.10] [0.64, 1.08] [0.67, 0.90]
Well Being 0.81 1.02 0.75*
[0.62, 1.05] [0.79, 1.32] [0.65, 0.87]
Sensation Seeking 0.83 1.25 0.74*
[0.63, 1.08] [0.94, 1.65] [0.63, 0.87]
Anxiety 0.85 0.82 1.09
[0.65, 1.10] [0.63, 1.07] [0.94, 1.27]
Honesty 0.98 0.96 0.98
[0.74, 1.28] [0.74, 1.25] [0.83, 1.15]
Industry 0.94 0.94 0.92
[0.73, 1.23] [0.71, 1.24] [0.79, 1.07]
Intellect 1.06 1.24 0.92
[0.79, 1.42] [0.86, 1.80] [0.78, 1.09]
Creativity 0.95 1.06 1.03
[0.76, 1.19] [0.79, 1.41] [0.88, 1.19]
Impulsivity 1.12 1.11 1.04
[0.88, 1.43] [0.86, 1.43] [0.89, 1.21]
Attention Seeking 0.78* 0.87 0.73*
[0.62, 0.99] [0.66, 1.14] [0.63, 0.85]
Order 1.11 0.94 0.92
[0.86, 1.42] [0.72, 1.22] [0.79, 1.06]
Authoritarianism 1.06 0.99 1.12
[0.81, 1.40] [0.74, 1.32] [0.96, 1.32]
Charisma 0.87 1.28 0.79*
[0.67, 1.14] [0.98, 1.68] [0.68, 0.91]
Trust 1.13 0.90 1.16
[0.87, 1.47] [0.68, 1.20] [0.99, 1.36]
Humor 1.41* 0.91 0.84*
[1.06, 1.88] [0.69, 1.19] [0.72, 0.98]
Emotional Expressiveness 0.81 1.30 0.95
[0.63, 1.05] [1.00, 1.69] [0.82, 1.11]
Art Appreciation 0.97 0.85 1.03
[0.75, 1.25] [0.67, 1.08] [0.88, 1.21]
Introspection 1.03 0.87 0.94
[0.78, 1.36] [0.68, 1.12] [0.80, 1.09]
Perfectionism 0.92 0.80 1.01
[0.72, 1.17] [0.61, 1.05] [0.86, 1.19]
Self Control 0.81 0.83 1.00
[0.62, 1.06] [0.65, 1.07] [0.85, 1.17]
Conformity 0.99 0.99 1.01
[0.76, 1.28] [0.78, 1.26] [0.86, 1.17]
Adaptability 1.08 1.18 1.02
[0.82, 1.42] [0.89, 1.55] [0.88, 1.19]
Easy Goingness 1.53* 1.22 1.23*
[1.13, 2.07] [0.94, 1.59] [1.05, 1.43]
Emotional Stability 1.10 1.11 0.87*
[0.84, 1.43] [0.85, 1.45] [0.75, 1.00]
Conservatism 0.98 1.00 0.82*
[0.75, 1.27] [0.77, 1.30] [0.71, 0.96]
SPI: 5 Factors
Agreeableness 0.76* 1.25 1.13
[0.60, 0.97] [0.92, 1.68] [0.96, 1.33]
Conscientiousness 0.90 0.80 0.90
[0.68, 1.19] [0.60, 1.06] [0.77, 1.06]
Extraversion 0.79 1.01 0.77*
[0.61, 1.03] [0.76, 1.35] [0.66, 0.89]
Neuroticism 1.17 1.42* 1.22*
[0.93, 1.49] [1.08, 1.86] [1.06, 1.41]
Openness 1.22 0.92 0.92
[0.94, 1.57] [0.69, 1.23] [0.79, 1.08]

Model accuracy

Overall accuracy

Kappa

Area Under Curve

Sensitivity analysis

Once we filter for adolescents living in the United States, approximately half our sample did not report either their height or weight. Given the sensitivity of body image, especially for the adolescent girls in our sample, we suspect these values are missing not at random (MNAR) and may impact the estimates here. To test for these effects, we imputed missing height and weight values using a principal components analysis approach, using only the other variables in the SAPA dataset that were not included in the analyses above. We repeated the logistic regression models with 10-fold cross validation, repeated 10 times, and report here the differences in significance across models and by term, as well as the estimates of the coefficients that were significant only in the complete data or in the imputed data.

Female

Male